This document creates plots of the network of Jaccard similarity indices for some of the exemplars rated as most self-similar. It builds on the exploratory work contained in graph-network-visualizations.Rmd.
The Jaccard index data are found in analysis/data/jaccard.csv.
jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Exemplar.Row = col_double(),
## Exemplar.Col = col_double(),
## Jaccard = col_double(),
## Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 Ă— 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
## $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
## $ Jaccard : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
## $ Group : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
## - attr(*, "spec")=
## .. cols(
## .. Exemplar.Row = col_double(),
## .. Exemplar.Col = col_double(),
## .. Jaccard = col_double(),
## .. Group = col_character()
## .. )
It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.
jaccard <- jaccard_raw %>%
dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))
Let’s add a Jaccard mean and median by Exemplar.Row.
jaccard_aug <- jaccard %>%
dplyr::group_by(., Group, Exemplar.Row) %>%
dplyr::mutate(.,
j_mean = mean(Jaccard),
j_med = median(Jaccard),
j_max = max(Jaccard),
j_min = min(Jaccard)
)
For each wallpaper group, pick the exemplar pair with the most extreme (highest) Jaccard value. Then plot the set of Jaccard indices for both members of the pair.
Create helper function to pick most extreme pair.
pick_n_pairs_max_jaccard <- function(wp_group = "P1", df = jaccard, n_pairs = 1) {
this_df <- df %>%
dplyr::filter(., Group == wp_group) %>%
dplyr::arrange(., desc(Jaccard))
this_df[1:n_pairs,]
}
Now, do this for all of the wallpaper groups.
wp_groups <- c("P1", "P31M", "P3M1", "P6", "P6M")
exemplars_max_jaccard <- purrr::map_df(wp_groups, pick_n_pairs_max_jaccard)
exemplars_max_jaccard
## # A tibble: 5 x 4
## Exemplar.Row Exemplar.Col Jaccard Group
## <dbl> <dbl> <dbl> <chr>
## 1 8 9 0.435 P1
## 2 2 7 0.65 P31M
## 3 19 20 0.404 P3M1
## 4 6 13 0.558 P6
## 5 10 20 0.383 P6M
Let’s create a simplified color scale.
value_breaks <- c(0, .2, .4, .6, .8)
value_colors <-
colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
legend_text <- c("<.2", ".2-.4", ".4-.6", ">.6")
Create helper function to generate heatmaps.
plot_heatmap <- function(wp_group = "P1",
df = jaccard,
show_legend = FALSE) {
# Select wp_group
this_df <- df %>%
dplyr::filter(., Group == wp_group)
# Turn Jaccard data into matrix
j_matrix <- matrix(nrow = 20, ncol = 20)
for (r in 1:190) {
j_matrix[this_df$Exemplar.Row[r], this_df$Exemplar.Col[r]] <-
this_df$Jaccard[r]
}
title_txt <- paste0(
wp_group,
": max= ",
format(max(this_df$Jaccard), digits = 2, nsmall = 2),
" | mean= ",
format(
mean(this_df$Jaccard),
digits = 2,
nsmall = 2
)
)
# value_breaks <- c(0, .2, .4, .6, .8)
# value_colors <-
# colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
heatmap(
j_matrix,
Rowv = NA,
Colv = NA,
main = title_txt,
symm = TRUE,
col = value_colors,
breaks = value_breaks
)
if (show_legend) {
legend(
x = "bottomright",
legend = legend_text,
fill = value_colors
)
}
# if (save_to_file) {
# png(paste0("img/", wp_group, "-", "jaccard-heatmap.png"))
# heatmap(
# j_matrix,
# Rowv = NA,
# Colv = NA,
# main = title_txt,
# symm = TRUE,
# col = value_colors,
# breaks = value_breaks
# )
#
# if (show_legend) {
# legend(
# x = "bottomright",
# legend = c("<.2", ".2-.4", ".4-.6", ">.6"),
# fill = colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
# )
# }
# dev.off()
#}
}
Test the function with default values.
plot_heatmap("P1")
Now, let’s plot the same for all wallpaper groups.
plot_heatmap("P31M")
plot_heatmap("P3M1")
plot_heatmap("P6")
plot_heatmap("P6M")
These are saved in img/.
Now, for each member of the most similar exemplar pair, we show the connectivity network.
Create helper function.
make_jaccard_network <- function(wp_group = "P1", df = jaccard) {
this_df <- df %>%
dplyr::filter(., Group == wp_group) %>%
dplyr::arrange(., Exemplar.Row, Exemplar.Col)
this_edges <- tibble(
from = this_df$Exemplar.Row,
to = this_df$Exemplar.Col,
weight = this_df$Jaccard
)
this_nodes <- tibble::tibble(id = 1:20)
tidygraph::tbl_graph(nodes = this_nodes,
edges = this_edges,
directed = FALSE)
}
Test with default parameters.
(p1_df <- make_jaccard_network())
## # A tbl_graph: 20 nodes and 190 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 20 x 1 (active)
## id
## <int>
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## # … with 14 more rows
## #
## # Edge Data: 190 x 3
## from to weight
## <int> <int> <dbl>
## 1 1 2 0.269
## 2 1 3 0.179
## 3 1 4 0.119
## # … with 187 more rows
Select a specific exemplar and categorize the Jaccard index values.
select_exemplar <- function(network_df = make_jaccard_network(), exemplar_id = 8) {
df <- network_df %>%
activate(edges) %>%
dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8),
labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
df
}
select_exemplar()
## # A tbl_graph: 20 nodes and 19 edges
## #
## # An unrooted tree
## #
## # Edge Data: 19 x 3 (active)
## from to weight
## <int> <int> <fct>
## 1 1 8 <.2
## 2 2 8 .2-.4
## 3 3 8 .2-.4
## 4 4 8 .2-.4
## 5 5 8 <.2
## 6 6 8 <.2
## # … with 13 more rows
## #
## # Node Data: 20 x 1
## id
## <int>
## 1 1
## 2 2
## 3 3
## # … with 17 more rows
Now, plot the edge values.
ggraph(select_exemplar(), layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = weight)) +
geom_node_text(aes(label = id), size = 6) +
theme_graph() +
coord_fixed() +
# NOTE: the drop = FALSE ensures that the full range of scales is used!
scale_edge_color_manual(name = "Jaccard",
values = value_colors,
drop = FALSE) +
theme(legend.text = element_text(size = 14)) +
theme(legend.title = element_text(size = 16))
plot_jaccard_vals <-
function(network_df = make_jaccard_network(),
exemplar_id = 8,
wp_group = "P1") {
df <- select_exemplar(network_df, exemplar_id)
ggraph(df, layout = "linear", circular = TRUE) +
geom_edge_arc(aes(color = weight)) +
geom_node_text(aes(label = id), size = 6) +
theme_graph() +
coord_fixed() +
# NOTE: the drop = FALSE ensures that the full range of scales is used!
scale_edge_color_manual(name = "Jaccard",
values = value_colors,
drop = FALSE) +
theme(legend.text = element_text(size = 14)) +
theme(legend.title = element_text(size = 16))
}
Test with default parameters.
plot_jaccard_vals()
And its companion.
plot_jaccard_vals(exemplar_id = 9)
plot_jaccard_vals(make_jaccard_network(wp_group = "P31M", df = jaccard), exemplar_id = 2, wp_group = "P31M")
plot_jaccard_vals(make_jaccard_network(wp_group = "P31M", df = jaccard), exemplar_id = 7, wp_group = "P31M")
plot_jaccard_vals(make_jaccard_network(wp_group = "P3M1", df = jaccard), exemplar_id = 19, wp_group = "P3M1")
plot_jaccard_vals(make_jaccard_network(wp_group = "P3M1", df = jaccard), exemplar_id = 20, wp_group = "P3M1")
plot_jaccard_vals(make_jaccard_network(wp_group = "P6", df = jaccard), exemplar_id = 6, wp_group = "P6")
plot_jaccard_vals(make_jaccard_network(wp_group = "P6", df = jaccard), exemplar_id = 13, wp_group = "P6")
plot_jaccard_vals(make_jaccard_network(wp_group = "P6M", df = jaccard), exemplar_id = 10, wp_group = "P6M")
plot_jaccard_vals(make_jaccard_network(wp_group = "P6M", df = jaccard), exemplar_id = 20, wp_group = "P6M")